---
title: "gov2020-repl-project"
format: pdf
editor: visual
---

## Packages

```{r}

library(tidyverse)
library(broom)
library(estimatr)
library(margins)
library(lme4)

# remotes::install_github("markwestcott34/stargazer-booktabs")
library(stargazer)

```

## Import Data

```{r}

panel_year <- read.csv("data.csv")

```

## New Variables

```{r}

# Function to check if of age during a conflict
of_age_when <- function(conflict_years, birth_year, age_range = 18:40) {
  min_year <- min(conflict_years)
  max_year <- max(conflict_years)
  birth_age_min <- min_year - birth_year
  birth_age_max <- max_year - birth_year
  return(birth_age_min %in% age_range | birth_age_max %in% age_range)
}

# Function to create combat instrument
create_combat_instrument <- function(cohort, combat_str, combat_pattern) {
  cohort * (str_detect(combat_str, combat_pattern))
}

# Refactored cohort_frame
cohort_frame <- panel_year %>%
  mutate(
    promotion_after_1949 = ifelse(!is.na(shangjiang_year), 1, 0),
    promotion_after_1988 = ifelse(is.na(shangjiang_year) | shangjiang_year < 1988, 0, 1),
    promotion_before_1955 = ifelse(is.na(shangjiang_year) | shangjiang_year > 1955, 0, 1),
    
    long_march_cohort = of_age_when(1934:1935, birth_year, age_range = 16:40),
    combat_japan_cohort = of_age_when(1937:1945, birth_year, age_range = 16:40),
    combat_communist_cohort = of_age_when(1946:1950, birth_year, age_range = 16:40),
    combat_korea_cohort = of_age_when(1950:1953, birth_year),
    combat_vietnam_cohort = of_age_when(1964:1979, birth_year),
    combat_india_cohort = of_age_when(1962, birth_year),
    
    fought_korea = str_detect(combat, "Korea|Korean"),
    fought_vietnam = str_detect(combat, "Vietnam|Vietnamese"),
    fought_communist = str_detect(combat, "Communist"),
    fought_japan = str_detect(combat, "Japan|Japanese"),
    fought_india = str_detect(combat, "India|Indian"),
    
    last_conflict_year = case_when(
      fought_vietnam ~ 1975,
      fought_korea ~ 1953,
      fought_india ~ 1962,
      fought_communist ~ 1950,
      fought_japan ~ 1945,
      TRUE ~ NA_real_
    ),
    
    age_when_shangjiang = shangjiang_year - birth_year,
    
    missed_all_eligibility = as.integer(
      !(birth_year %in% c(1904:1920, 1920:1935, 1949:1961, 1907:1929, 1900:1922, 1919:1939))
    ),
    any_combat = fought_korea + fought_vietnam + fought_india + fought_communist + fought_japan + participated_long_march,
    combat_pre_1955 = fought_korea + fought_japan + fought_communist + participated_long_march,
    combat_post_1955 = fought_vietnam + fought_india
  ) %>%
  mutate(
    pre_prc_cohort = as.integer(birth_year %in% 1894:1933),
    post_prc_cohort = as.integer(birth_year %in% 1910:1959)
  ) 

```

## Restructure Data

```{r}

df <- cohort_frame %>% 
  dplyr::select(name, unique_id, birth_year, general_rank, promotion_after_1949, any_combat, combat_pre_1955, combat_post_1949, promotion_before_1955, promotion_after_1988, ends_with(c("instrument")), participated_long_march, starts_with(c("fought")), ends_with(c("cohort")), ends_with(c("subset")), cmc_chair_connection, shangjiang_year, age_when_shangjiang, education_level, starts_with(c("parent_")), birth_prefecture, birth_province, ends_with(".network"), combat_post_1955, cohort_decade) %>% 
  distinct()

standardise <- function(x){
  return((x - mean(x, na.rm = T))/sd(x))
}

df %<>% group_by(cohort_decade) %>%
  mutate(
    std_com_pre = standardise(combat_pre_1955),
    std_com_post = standardise(any_combat)
  )
```

## Base Model

```{r}

# Promotion to general ~ combat before and after 1949

m1 <- lm(promotion_after_1949 ~ any_combat, data = df)

summary(m1)

m2 <- lm(promotion_before_1955 ~ combat_pre_1955 + as.factor(cohort_decade), data = df)

summary(m2)

m3 <- lm(promotion_after_1988 ~ any_combat  + as.factor(cohort_decade), data = df %>% filter(!promotion_before_1955 == 1))

summary(m3)

```

```{r, eval = F}

tidy_m2 <- tidy(m2, conf.int = TRUE)
tidy_m3 <- tidy(m3, conf.int = TRUE)

tidy_m2 <- tidy_m2 %>% mutate(model = "Promotion Before 1955")
tidy_m3 <- tidy_m3 %>% mutate(model = "Promotion After 1988")

tidy_frame <- tidy_m2 %>% 
  bind_rows(tidy_m3) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(model = factor(model, levels = c("Promotion Before 1955", "Promotion After 1988")))

ggplot(tidy_frame, aes(x = model, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, lty = "dashed", color = "darkgrey") +
  geom_pointrange() +
  labs(
    title = "Coefficient Plot for Promotion Models",
    x = "Model",
    y = "Log Odds (Coefficient)"
  ) +
  theme_bw() +
  scale_y_continuous(limits = c(-2, 2)) +
  labs(y = "Promotion Probability (Log Odds)",
       x = NULL,
       title = "")

ggsave(filename = "output/base_logodds.pdf", width = 6, height = 4)

mar1 <- margins(m2) %>% summary
mar2 <- margins(m3) %>% summary

t <- rbind(mar1, mar2) %>% 
  filter(factor %in% c("combat_pre_1955", "any_combat")) %>%
  mutate(`factor` = `factor` %>% factor(., levels = .))

ggplot(t, aes(x = factor, y = AME, ymin = lower, ymax = upper)) + geom_hline(yintercept = 0, lty = "dashed", color = "darkgrey") +
  geom_pointrange() +
  scale_x_discrete(labels = c("Pre-", "Post-")) +
  scale_y_continuous(labels = scales::percent_format(suffix = "pp")) +
  labs(
    title = "Marginal Effect of Combat Experience on Promotion Probability",
    x = "State Consolidation",
    y = NULL
  ) +
  theme_bw()
  # scale_y_continuous(limits = c(-2, 2)) +

ggsave(filename = "output/base_ames.pdf", height = 4, width = 6)

```

## Expanded Model 0A (+Birth cohort)

```{r}

m2aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + (combat_pre_1955  - 1 | cohort_decade), family = "binomial", data = df)

summary(m2aa)

m3aa <- glmer(promotion_after_1988 ~ combat_pre_1955 + combat_post_1955 + ( combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), family = "binomial", data = df)

summary(m3aa)

```

## Expanded Model 1 (+ education + parentCCP)

```{r}


m4aa <-  glmer(promotion_after_1949 ~ any_combat + education_level + parent_CCP_leader + (any_combat | cohort_decade) , data = df, family = binomial(link = "logit"))


m5aa <-  glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + (combat_pre_1955 - 1 | cohort_decade) , data = df, family = binomial(link = "logit"))


m6aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

```

## Expanded Model 2 (+ birth province FE)

```{r}

m8aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + (combat_pre_1955 - 1 | cohort_decade), data = df, family = binomial(link = "logit"))

summary(m8aa)

m9aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

summary(m9aa)


```

## Expanded Model 3 (+ network)

```{r}
m11aa <- glmer(promotion_before_1955 ~ combat_pre_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df, family = binomial(link = "logit"))

m12aa <- glmer(promotion_after_1988 ~  combat_pre_1955 + combat_post_1955 + education_level + parent_CCP_leader + deng.network + jiang.network + hu.network + xi.network + as.factor(birth_province) + (combat_pre_1955 + combat_post_1955 - 1 | cohort_decade), data = df %>% filter(promotion_before_1955 != 1), family = binomial(link = "logit"))

```

## Export regression table (3)

```{r}
stargazer(m2aa, m3aa, m5aa, m6aa, m8aa, m9aa, m11aa, m12aa,
          header = FALSE,
          omit = c("birth_province", ".network", "_decade"), type = "latex",
          dep.var.labels = rep(c("Pre-1955", "1988+"),4),
          add.lines=list(c("Birth decade fixed effects", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Birth province fixed effects", "", "", "", "", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$", "$\\checkmark$"),
                         c("Leader networks", "", "", "", "", "", "", "$\\checkmark$", "$\\checkmark$")),
          covariate.labels = c("Pre-1955 Combat exp.", "Post-1955 Combat exp.",
                               "Education", "Parental CCP history"),
          label = "tab:logit-regs",
          title = "Logit Regressions on Pre- and Post-Statebuilding Promotions"#,
          # out = "output/logit-regs-birthcohort-partial-pooling.tex"
          )
```

## IV Regression (Rank \~ Fight \| Eligible) (Promotions in 1955)

### Long March

```{r}

iv_model_long_march_1 <- iv_robust(promotion_before_1955 ~ participated_long_march | 
                                long_march_cohort, 
                              data = df, diagnostics = TRUE)

summary(iv_model_long_march)

# first_stage_long_march <- lm(combat_post_1949 ~ long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_long_march)

```

### Sino-Japanese War

```{r}

iv_model_japan_war_1 <- iv_robust(promotion_before_1955 ~ fought_japan + participated_long_march | combat_japan_cohort + participated_long_march, 
                              data = df, diagnostics = TRUE)

summary(iv_model_japan_war)

# first_stage_japan_war <- lm(combat_post_1949 ~ combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)

# summary(first_stage_japan_war)



```

### Communist Civil War

```{r}

iv_model_civil_war_1 <- iv_robust(promotion_before_1955 ~  fought_communist + participated_long_march + fought_japan | 
                              combat_communist_cohort + participated_long_march + fought_japan, 
                              data = df, diagnostics = TRUE)

summary(iv_model_civil_war)
# 
# first_stage_civil_war <- lm(combat_post_1949 ~ combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_civil_war)

```

### Korean War

```{r}

iv_model_korea_1 <- iv_robust(promotion_before_1955 ~ fought_korea + fought_communist + fought_japan + participated_long_march | combat_korea_cohort + fought_communist + fought_japan + participated_long_march, 
                        data = df, diagnostics = TRUE)

summary(iv_model_korea)

# first_stage_korea <- lm(combat_post_1949 ~ combat_korea_instrument + combat_japan_instrument + combat_communist_instrument + long_march_instrument + age, data = df)
# 
# summary(first_stage_korea)


```

```{r}

library(broom)

coef_long_march <- tidy(iv_model_long_march_1, conf.int = TRUE)[2, ]
coef_japan_war <- tidy(iv_model_japan_war_1, conf.int = TRUE)[2, ]
coef_civil_war <- tidy(iv_model_civil_war_1, conf.int = TRUE)[2, ]
coef_korea <- tidy(iv_model_korea_1, conf.int = TRUE)[2, ]
# coef_india <- tidy(iv_model_india, conf.int = TRUE)[2, ]
# coef_vietnam <- tidy(iv_model_vietnam, conf.int = TRUE)[2, ]

coefficients_1 <- coef_long_march %>% 
  bind_rows(coef_japan_war) %>% 
  bind_rows(coef_civil_war) %>% 
  bind_rows(coef_korea) # %>%
  # bind_rows(coef_india) %>% 
  # bind_rows(coef_vietnam)

coefficients_1$model <- factor(c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War"), levels = c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War"))

plot55 <- coefficients_1 %>% 
  filter(!model %in% c("Sino-India War", "Vietnam War")) %>% 
  ggplot(aes(x = model, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Pre-consolidation (1955)",
    x = NULL,
    y = "Change in Probability of Promotion"
  ) +
  theme_bw() +
    scale_y_continuous(limits = c(-1, .5)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

```

## IV Regression (Rank \~ Fight \| Eligible) (Promotions post 1988)

### Long March

```{r}

iv_model_long_march_2 <- iv_robust(promotion_after_1988 ~ participated_long_march | 
                                long_march_cohort, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_long_march)

# first_stage_long_march <- lm(combat_post_1949 ~ long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_long_march)

```

### Sino-Japanese War

```{r}

iv_model_japan_war_2 <- iv_robust(promotion_after_1988 ~ fought_japan + participated_long_march | combat_japan_cohort + participated_long_march, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_japan_war)

# first_stage_japan_war <- lm(combat_post_1949 ~ combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)

# summary(first_stage_japan_war)



```

### Communist Civil War

```{r}

iv_model_civil_war_2 <- iv_robust(promotion_after_1988 ~  fought_communist + participated_long_march + fought_japan | 
                              combat_communist_cohort + participated_long_march + fought_japan, 
                              data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_civil_war)
# 
# first_stage_civil_war <- lm(combat_post_1949 ~ combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_civil_war)

```

### Korean War

```{r}

iv_model_korea_2 <- iv_robust(promotion_after_1988 ~ fought_korea + fought_communist + fought_japan + participated_long_march | combat_korea_cohort + fought_communist + fought_japan + participated_long_march, 
                        data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_korea)

# first_stage_korea <- lm(combat_post_1949 ~ combat_korea_instrument + combat_japan_instrument + combat_communist_instrument + long_march_instrument + age, data = df)
# 
# summary(first_stage_korea)


```

### Sino-Indian War

```{r}

iv_model_india_2 <- iv_robust(promotion_after_1988 ~ fought_india +fought_korea + fought_communist + fought_japan + participated_long_march | 
                            combat_india_cohort + fought_korea + fought_communist + fought_japan + participated_long_march, 
                          data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_india)

# first_stage_vietnam <- lm(combat_post_1949 ~ combat_vietnam_instrument + combat_korea_instrument + combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_vietnam)
```

### Vietnam War

```{r}

iv_model_vietnam_2 <- iv_robust(promotion_after_1988 ~ fought_vietnam + fought_korea + fought_communist + fought_japan + fought_india + participated_long_march | 
                            combat_vietnam_cohort + fought_korea + fought_communist + fought_japan + fought_india + participated_long_march, 
                          data = df %>% filter(promotion_before_1955 != 1), diagnostics = TRUE)

summary(iv_model_vietnam)

# first_stage_vietnam <- lm(combat_post_1949 ~ combat_vietnam_instrument + combat_korea_instrument + combat_communist_instrument + combat_japan_instrument + long_march_instrument + age + cmc_chair_connection, data = df)
# 
# summary(first_stage_vietnam)

```

```{r}

library(broom)

coef_long_march <- tidy(iv_model_long_march_2, conf.int = TRUE)[2, ]
coef_japan_war <- tidy(iv_model_japan_war_2, conf.int = TRUE)[2, ]
coef_civil_war <- tidy(iv_model_civil_war_2, conf.int = TRUE)[2, ]
coef_korea <- tidy(iv_model_korea_2, conf.int = TRUE)[2, ]
coef_india <- tidy(iv_model_india_2, conf.int = TRUE)[2, ]
coef_vietnam <- tidy(iv_model_vietnam_2, conf.int = TRUE)[2, ]

coefficients_2 <- coef_long_march %>% 
  bind_rows(coef_japan_war) %>% 
  bind_rows(coef_civil_war) %>% 
  bind_rows(coef_korea) %>%
  bind_rows(coef_india) %>% 
  bind_rows(coef_vietnam)

coefficients_2$model <- factor(c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War", "Sino-India War", "Vietnam War"), levels = c("Long March", "Sino-Japanese War", "Civil War" ,"Korean War", "Sino-India War", "Vietnam War"))

plot88 <- coefficients %>% 
  filter(!model %in% c("Sino-India War", "Vietnam War")) %>% 
  ggplot(aes(x = model, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Post-consolidation (1988-)",
    x = NULL,
    y = "Change in Probability of Promotion"
  ) +
  theme_bw() +
  scale_y_continuous(limits = c(-1, .5)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

```

## Combined Plots

```{r}

library(ggpubr)

combined_plot <- ggarrange(plot55 + scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")), plot88 + ylab(NULL)  + scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")))

combined_plot

ggsave(combined_plot, filename = "output/2sls.pdf", height = 4, width = 8)

```

## Pooled Difference Plots

```{r}
coefficients_1 <- coefficients_1 %>% mutate(
  var = std.error ^2,
  type = "before",
)

coefficients_2 <- coefficients_2 %>% mutate(
  var = std.error ^2,
  type = "after"
)

combined_coefficients <- rbind(coefficients_1[, c("term", "estimate", "var", "type")],coefficients_2[1:4, c("term", "estimate", "var", "type")])

c = qnorm(1-.05/2)

diff_df <- pivot_wider(combined_coefficients, id_cols= term, names_from = type,
            values_from = c(estimate, var)) %>% 
  mutate(diff = estimate_after - estimate_before,
         std.err = sqrt(var_before + var_after),
         term = factor(term, levels = term),
         ci.low = diff - std.err * c,
         ci.high = diff + std.err * c)

diff_p <- ggplot(diff_df, aes(x = term, y = diff, ymin = ci.low, ymax = ci.high)) + 
  geom_point() + 
  geom_errorbar(width = .2) + 
  geom_hline(yintercept = 0, color = 'red', linetype = 'dashed') + 
  scale_x_discrete(labels = c("Long March", "Japan", "Civil" ,"Korea")) +
  labs(
    y = "Change in Promotion Probabilities\n(Post minus Pre-consolidation)",
    x = "Combat Experience",
    caption = "Confidence intervals are derived from pooled standard errors\nof the before and after models at the 95% confidence level."
  ) +
  theme_bw()

ggsave(diff_p, filename = "output/diff_plot.pdf", height = 6, width = 8)
```
